Load libraries and other custom code.
library(easypackages)
libraries("here","ggplot2","ggridges","reshape2","patchwork","RColorBrewer",
"plotrix","tidyverse","cluster","ggpackets","wordcloud","proxy",
"ggeasy","NbClust","MASS","robustbase","MKinfer","impute","ggheatmap",
"janitor", "lme4", "lmtest","matlabr","psych")
options(matlab.path = "/Applications/MATLAB_R2021b.app/bin")
codepath = here("code")
resultpath = here("results")
plot_path = here("plots")
source(file.path(codepath,"utils.R"))
plot_title_name = "IIT"
WEIGHT = "none"
nstimuli = 700
fdr_thresh = 0.05
nperm = 1000
stim_names = character(length=nstimuli)
stim_ids = character(length=nstimuli)
for (i in 1:nstimuli){
stim_names[i] = sprintf("stim%03d",i)
stim_ids[i] = sprintf("1%03d",i)
}
# read in semantic feature file that tells us which features go along with which stimuli
semantic_features_annot =read.csv(file.path(codepath, "semantic_features.csv"))
semantic_features = colnames(semantic_features_annot)[2:ncol(semantic_features_annot)]
semantic_features_annot2 =read.csv(file.path(codepath, "semantic_features_custom.csv"))
semantic_features2 = colnames(semantic_features_annot2)[2:ncol(semantic_features_annot2)]
semantic_features_annot = cbind(semantic_features_annot, semantic_features_annot2[,semantic_features2])
semantic_features = colnames(semantic_features_annot)[2:ncol(semantic_features_annot)]
# compute some extra categories from combinations of existing categories
semantic_features_annot$social = (semantic_features_annot[,c("face")]==1 & semantic_features_annot[,c("human")]==1)*1
semantic_features_annot$nonsocial = (semantic_features_annot[,c("human")]==0 & semantic_features_annot[,c("animal")]==0)*1
semantic_features = colnames(semantic_features_annot)[2:ncol(semantic_features_annot)]
# masks for each semantic category
stim_masks = list()
for (sem_feat in semantic_features){
stim_masks[[sem_feat]] = as.character(semantic_features_annot$stimulus[semantic_features_annot[,sem_feat]==1])
}
# classifier output [0,1] subjects BY stimuli
fname = file.path(resultpath, sprintf("weight_%s_classification_accuracy_perSubStim.csv",WEIGHT))
classifier_output_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(classifier_output_sub_by_stim) = stim_ids
# masks for each semantic category
sublist = rownames(classifier_output_sub_by_stim)
fp_masks = list()
for (subid in sublist){
fp_masks[[subid]] = colnames(classifier_output_sub_by_stim)[classifier_output_sub_by_stim[subid,]==1]
}
# classifier output [0,1] stimuli BY perm
fname = file.path(resultpath, sprintf("weight_%s_classification_accuracy_perStim_perm.csv",WEIGHT))
classifier_output_stim_by_perm = read.csv(fname, row.names=1, na.strings = "NaN")
# fingerprint ratios subject by stimulus
fname = file.path(resultpath, sprintf("weight_%s_fingerprintratios_perSubStim.csv",WEIGHT))
fpratio_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(fpratio_sub_by_stim) = stim_ids
# mean fingerprint ratios stimulus by perm
fname = file.path(resultpath, sprintf("weight_%s_mean_fingerprintratios_perStim_perm.csv",WEIGHT))
mean_fpratio_stim_by_perm = read.csv(fname, row.names=1, na.strings = "NaN")
# intrasubject correlation subject by stimulus
fname = file.path(resultpath, sprintf("weight_%s_intrasubjectR_perSubStim.csv",WEIGHT))
intrasubR_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(intrasubR_sub_by_stim) = stim_ids
# mean intersubject correlation subject by stimulus
fname = file.path(resultpath, sprintf("weight_%s_mean_intersubjectR_perSubStim.csv",WEIGHT))
mean_intersubR_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(mean_intersubR_sub_by_stim) = stim_ids
# nfingerprintable stimuli per subject
fname = file.path(resultpath, sprintf("weight_%s_classification_nfingerprintableStimuli_perSub.csv",WEIGHT))
nfpstim_per_subject_original = read.csv(fname, row.names=1, na.strings = "NaN")
#weight nfp stimuli by the missing values in the intra correlation matrix
nfpstim_per_subject_original$missing_tot <- 700 - rowSums(is.na(intrasubR_sub_by_stim)) #calculate number of complete obs
nfpstim_per_subject_original$missing_ratio <- (nfpstim_per_subject_original$missing_tot/700) #divide complete obs by total number of stimuli
nfpstim_per_subject_original <- nfpstim_per_subject_original %>% mutate(fpstimuli_ratioed = (round(missing_ratio*nfingerprintable_stimuli, 0))) #multiple the complete obs to stimuli number ratio by the number of fingerprintable stimuli (this will reduce the count of fp stimuli for participants with missing data)
#create a new dataframe that contains the weighted number of fp stimuli
nfpstim_per_subject <- data.frame(nfpstim_per_subject_original$fpstimuli_ratioed)
rownames(nfpstim_per_subject) <- rownames(nfpstim_per_subject_original)
nfpstim_per_subject <- nfpstim_per_subject %>% rename(nfingerprintable_stimuli = nfpstim_per_subject_original.fpstimuli_ratioed)
# nfingerprintable stimuli subject by perm
fname = file.path(resultpath, sprintf("weight_%s_classification_nfingerprintableStimuli_perSub_perm.csv",WEIGHT))
nfpstim_sub_by_perm_original = read.csv(fname, row.names=1, na.strings = "NaN")
nfpstim_sub_by_perm = round(nfpstim_sub_by_perm_original*nfpstim_per_subject_original$missing_ratio, 0)
#read in phenotypic data
pheno_data = read.csv(here("data","pheno","pheno_OSIE.csv"))
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
subs2exclude = rownames(intrasubR_sub_by_stim)[na_mask]
mask1 = is.element(pheno_data$subj_ID,rownames(intrasubR_sub_by_stim))
mask2 = !is.element(pheno_data$subj_ID, subs2exclude)
mask = mask1 & mask2
pheno_data_sub = pheno_data %>% filter(mask)
describe(pheno_data_sub)
## vars n mean sd median trimmed mad min max
## subj_ID* 1 105 53.00 30.45 53.00 53.00 38.55 1.00 105.00
## sex* 2 105 1.50 0.50 2.00 1.51 0.00 1.00 2.00
## age 3 105 26.21 6.30 25.00 25.34 4.45 18.00 50.00
## IQ 4 91 108.65 13.09 110.00 108.99 10.38 74.50 138.00
## test_retest_delay 5 105 7.33 1.46 7.00 7.00 0.00 4.00 16.00
## is_imagelist_shared 6 105 0.22 0.42 0.00 0.15 0.00 0.00 1.00
## S1_CalError_OSIE 7 105 0.48 0.30 0.42 0.44 0.17 0.11 2.59
## S2_CalError_OSIE 8 105 0.48 0.22 0.43 0.45 0.17 0.11 1.38
## Diff_CalError_OSIE 9 105 0.20 0.24 0.14 0.16 0.11 0.00 2.07
## AQ_Tot 10 105 16.92 6.72 16.00 16.55 4.45 4.00 38.00
## AQ_Soc 11 105 3.14 3.05 2.00 2.67 1.48 0.00 13.00
## AQ_Comm 12 105 1.66 1.59 1.00 1.46 1.48 0.00 8.00
## AQ_AttDet 13 105 3.46 1.92 3.00 3.41 2.97 0.00 7.00
## SRS_Tot_Raw 14 105 45.16 22.77 42.00 43.28 19.27 9.00 151.00
## SRS_DSM5_SOC 15 105 26.18 13.51 23.00 25.00 11.86 5.00 83.00
## SRS_DSM5_RRB 16 105 18.98 10.66 18.00 18.02 10.38 2.00 68.00
## SRS_Emp_ER 17 105 18.11 8.71 17.00 17.59 7.41 3.00 46.00
## SRS_Emp_Avoid 18 105 4.99 3.47 4.00 4.59 2.97 0.00 21.00
## SRS_Emp_IR 19 105 3.08 2.79 2.00 2.66 1.48 0.00 16.00
## SRS_Emp_Same 20 105 14.28 7.66 13.00 13.56 7.41 1.00 46.00
## SRS_Emp_RM 21 105 4.70 3.62 4.00 4.34 2.97 0.00 22.00
## SQ_Tot 22 98 56.43 17.52 53.50 55.23 14.83 22.00 114.00
## range skew kurtosis se
## subj_ID* 104.00 0.00 -1.23 2.97
## sex* 1.00 -0.02 -2.02 0.05
## age 32.00 1.41 2.09 0.62
## IQ 63.50 -0.32 -0.15 1.37
## test_retest_delay 12.00 3.76 16.15 0.14
## is_imagelist_shared 1.00 1.34 -0.21 0.04
## S1_CalError_OSIE 2.47 3.77 22.93 0.03
## S2_CalError_OSIE 1.28 1.31 2.13 0.02
## Diff_CalError_OSIE 2.07 4.79 31.75 0.02
## AQ_Tot 34.00 0.76 1.14 0.66
## AQ_Soc 13.00 1.33 1.22 0.30
## AQ_Comm 8.00 1.30 1.95 0.16
## AQ_AttDet 7.00 0.12 -1.00 0.19
## SRS_Tot_Raw 142.00 1.37 3.74 2.22
## SRS_DSM5_SOC 78.00 1.20 2.41 1.32
## SRS_DSM5_RRB 66.00 1.26 3.16 1.04
## SRS_Emp_ER 43.00 0.72 0.54 0.85
## SRS_Emp_Avoid 21.00 1.39 3.25 0.34
## SRS_Emp_IR 16.00 1.90 4.77 0.27
## SRS_Emp_Same 45.00 1.10 2.06 0.75
## SRS_Emp_RM 22.00 1.37 3.65 0.35
## SQ_Tot 92.00 0.76 0.41 1.77
table(pheno_data_sub$sex)
##
## F M
## 52 53
# test for difference in calibration errors between session 1 and 2
tmp_df = melt(pheno_data_sub[,c("subj_ID","S1_CalError_OSIE","S2_CalError_OSIE")], id.vars = "subj_ID")
t_res = t.test(x = tmp_df$value[tmp_df$variable=="S1_CalError_OSIE"],
y = tmp_df$value[tmp_df$variable=="S2_CalError_OSIE"],
paired = TRUE)
t_res
##
## Paired t-test
##
## data: tmp_df$value[tmp_df$variable == "S1_CalError_OSIE"] and tmp_df$value[tmp_df$variable == "S2_CalError_OSIE"]
## t = 0.0038543, df = 104, p-value = 0.9969
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06082010 0.06105698
## sample estimates:
## mean of the differences
## 0.000118441
code2run = sprintf("cd %s", codepath)
code2run = sprintf("%s; resultpath = '%s'",code2run, resultpath)
code2run = sprintf("%s; load('%s')",code2run, file.path(resultpath, "weight_none_gsa_res.mat"))
code2run = sprintf("%s; id_mat = gsa_res.id_mat; id_mat_symm = id_mat; tmp_mat = squeeze(id_mat(:,:,1)); mask = tril(tmp_mat)~=0; for istim = 1:size(id_mat,3); tmp_mat = squeeze(id_mat(:,:,istim)); tmp_mat_rev = tmp_mat'; tmp_mat2use = tmp_mat; tmp_mat2use(mask) = tmp_mat_rev(mask); id_mat_symm(:,:,istim) = tmp_mat2use; end; id_mat = id_mat_symm; tmp_ct = nan(size(id_mat,1),size(id_mat,2)); for isub = 1:size(id_mat,1); tmp_mat = squeeze(id_mat(isub,:,:))'; tmp_ct(isub,:) = nanmedian(tmp_mat,1); end; data2use = tmp_ct; tab2write = cell2table([gsa_res.subids, num2cell(data2use)], 'VariableNames',[{'subid'},gsa_res.subids']); file2write = fullfile(resultpath,'weight_none_id_mat_all.csv'); writetable(tab2write, file2write)
",code2run)
code2run = sprintf("%s; semantic_features = readtable('%s')", code2run, file.path(resultpath, "semantic_features.csv"))
code2run = sprintf("%s; features2use = semantic_features.Properties.VariableNames(3:end)", code2run)
code2run = sprintf("%s; for i=1:length(features2use); curr_feature = features2use{i}; disp(curr_feature); feature_mask = semantic_features.(curr_feature)==1; tmp_ct = nan(size(id_mat,1),size(id_mat,2)); for isub = 1:size(id_mat,1); tmp_mat = squeeze(id_mat(isub,:,feature_mask))'; tmp_ct(isub,:) = nanmedian(tmp_mat,1); end; data2use = tmp_ct; tab2write = cell2table([gsa_res.subids, num2cell(data2use)], 'VariableNames',[{'subid'},gsa_res.subids']); file2write = fullfile(resultpath,['weight_none_id_mat_',curr_feature,'.csv']); writetable(tab2write, file2write); end; exit", code2run)
res = run_matlab_code(code2run)
# heatmap of semantic feature matrix to see which semantic features are usually co-occuring
df_sem_feat = as.matrix(semantic_features_annot[,2:ncol(semantic_features_annot)])
clust_method = "ward.D2"
dist_method = "binary"
nbc_index = "silhouette"
# cluster stimuli
stim_feat_res = NbClust(data = as.matrix(df_sem_feat),
index = nbc_index,
distance = dist_method,
method = clust_method,
min.nc=2, max.nc=6)
k_stim = stim_feat_res$Best.nc
row_cols = brewer.pal(k_stim[[1]], "Set1")[stim_feat_res$Best.partition]
# add clustering solution to semantic_features_annot
cluster_df = data.frame(stimulus = factor(semantic_features_annot$stimulus),
cluster = factor(stim_feat_res$Best.partition))
semantic_features_annot = merge(cluster_df,semantic_features_annot[, c("stimulus",semantic_features)], by = "stimulus")
# cluster semantic features
sem_feat_res = NbClust(data = as.matrix(t(df_sem_feat)),
index = nbc_index,
distance = dist_method,
method = clust_method,
min.nc=2, max.nc=6)
k_sem_feat = sem_feat_res$Best.nc
col_cols = brewer.pal(k_sem_feat[[1]], "Set1")[sem_feat_res$Best.partition]
# make heatmap
stim_hmap_res = heatmap(as.matrix(df_sem_feat),
hclustfun=function(d) hclust(d, method=clust_method),
scale = "none",
RowSideColors=row_cols,
ColSideColors=col_cols)
stim_order_dendrogram = stim_hmap_res$rowInd
stim_hmap_res
## $rowInd
## [1] 298 508 675 128 683 24 414 7 259 307 643 526 38 452 57 31 129 496
## [19] 208 76 54 19 73 264 37 594 9 82 122 635 622 551 507 375 214 175
## [37] 202 528 193 68 8 349 672 311 473 273 80 231 599 490 299 399 276 198
## [55] 564 404 377 123 12 97 598 515 174 424 519 645 166 87 337 179 699 625
## [73] 489 417 324 322 316 309 171 211 652 541 29 488 293 403 280 654 353 116
## [91] 314 591 655 476 266 134 230 471 323 251 245 199 92 91 5 16 487 659
## [109] 658 423 192 30 151 677 554 93 250 317 67 15 35 595 410 40 234 426
## [127] 262 416 644 315 21 43 272 137 49 1 26 687 664 653 607 559 557 523
## [145] 362 334 305 281 152 239 449 347 62 243 104 690 98 36 86 79 691 673
## [163] 556 548 499 384 105 109 604 671 651 458 363 328 209 120 153 638 624 383
## [181] 492 670 99 144 133 393 456 391 149 387 674 428 235 256 369 612 469 14
## [199] 275 520 678 400 589 581 65 118 642 306 371 665 660 534 634 143 667 381
## [217] 210 247 288 516 656 270 44 246 696 17 447 646 22 165 356 374 341 90
## [235] 390 279 61 150 662 558 261 457 578 613 583 350 479 52 530 700 682 420
## [253] 146 163 265 623 605 584 572 411 402 348 228 115 85 3 72 681 666 112
## [271] 318 81 278 629 609 582 346 302 289 25 84 514 320 453 693 58 639 69
## [289] 248 689 533 330 292 41 139 597 335 697 333 425 686 577 465 232 359 148
## [307] 552 434 477 161 580 373 60 71 310 421 63 100 661 332 647 257 611 159
## [325] 140 561 506 329 180 45 110 200 154 540 370 562 650 549 167 537 531 444
## [343] 338 282 156 147 354 158 679 585 368 263 698 503 4 379 602 657 430 569
## [361] 441 626 429 475 188 297 119 177 627 88 186 521 74 249 380 252 269 633
## [379] 603 482 408 191 244 571 542 497 450 419 212 83 203 308 355 344 205 481
## [397] 361 593 445 461 553 610 227 178 187 409 576 398 543 418 570 685 529 692
## [415] 579 459 588 586 493 467 451 385 366 378 242 285 641 46 164 630 592 454
## [433] 241 224 184 27 172 535 443 422 219 47 55 640 460 213 206 94 101 437
## [451] 201 33 225 486 223 621 412 494 518 168 466 560 357 616 319 448 107 325
## [469] 608 614 401 547 326 303 301 59 233 573 483 238 162 131 114 121 480 204
## [487] 536 505 512 555 485 498 550 501 502 565 382 649 510 491 440 389 395 48
## [505] 70 376 522 567 406 468 255 260 436 364 258 226 56 194 296 511 439 539
## [523] 372 394 669 78 217 396 504 221 397 111 196 351 185 50 222 636 606 600
## [541] 566 478 474 435 343 294 220 197 176 155 10 18 574 431 283 32 267 568
## [559] 253 271 313 618 601 538 53 321 500 632 495 472 470 463 438 427 415 342
## [577] 284 268 181 13 160 587 290 173 237 102 563 336 513 631 388 527 77 525
## [595] 2 545 637 6 215 145 157 64 95 170 11 130 620 517 433 89 358 345
## [613] 142 339 446 442 125 386 340 432 124 367 464 141 182 254 407 405 676 195
## [631] 286 277 392 413 546 240 103 138 695 236 229 66 51 360 189 365 617 615
## [649] 663 352 532 484 509 619 304 42 596 108 218 524 127 327 680 291 300 668
## [667] 106 113 96 117 688 648 455 183 287 590 544 190 216 462 207 136 331 132
## [685] 575 694 34 684 126 295 628 23 39 312 135 169 75 274 28 20
##
## $colInd
## [1] 1 10 14 3 4 5 7 9 16 6 8 12 11 2 15 13
##
## $Rowv
## NULL
##
## $Colv
## NULL
cluster_res = data.frame(clust_labels = stim_feat_res$Best.partition, clust_colors = row_cols)
ctab = as.data.frame(table(cluster_res$clust_labels))
for (i in 1:k_stim[[1]]){
tmp_col = as.character(unique(cluster_res$clust_colors[cluster_res$clust_labels==i]))
ctab$color[i] = plotrix::color.id(tmp_col)
ctab$clust_num[i] = unique(cluster_res$clust_labels[cluster_res$clust_labels==i])
}
ctab
## Var1 Freq color clust_num
## 1 1 158 firebrick2 1
## 2 2 542 steelblue 2
nfpstim_per_subject$c1 = rowSums(classifier_output_sub_by_stim[,cluster_res$clust_labels==1])
nfpstim_per_subject$c2 = rowSums(classifier_output_sub_by_stim[,cluster_res$clust_labels==2])
# get sum of stimuli within semantic clusters and plot as a word cloud
sem_feat_cols = unique(col_cols)
stim_cols = unique(row_cols)
# function to do gaze fingerprint classification
gaze_fingerprint_classifier <- function(file2use, na_mask, nperm=0){
tmp_data = read.csv(file2use, row.names=1)
tmp_data = tmp_data[!na_mask,!na_mask]
tmp_res = matrix(nrow = dim(tmp_data)[1], ncol=3)
for (isub in 1:dim(tmp_data)[1]){
tmp_res[isub,1] = isub
tmp_res[isub,2] = which(tmp_data[isub,]==max(tmp_data[isub,]))
}
tmp_res[,3] = tmp_res[,1]==tmp_res[,2]
accuracy = sum(tmp_res[,3])/dim(tmp_data)[1]
result = data.frame(matrix(nrow = 1, ncol = 3))
colnames(result) = c("accuracy", "mean_null", "pval")
result$accuracy = accuracy
if (nperm>0){
tmp_perm_res = data.frame(matrix(nrow = nperm, ncol = 2))
colnames(tmp_perm_res) = c("perm_num","accuracy")
for (iperm in 1:nperm){
# print(iperm)
subids = rownames(tmp_data)
set.seed(iperm)
rand_perm = sample(length(subids))
perm_tmp_data = tmp_data[subids[rand_perm],]
perm_tmp_res = matrix(nrow = dim(perm_tmp_data)[1], ncol=3)
for (isub in 1:dim(perm_tmp_data)[1]){
perm_tmp_res[isub,1] = isub
perm_tmp_res[isub,2] = which(perm_tmp_data[isub,]==max(perm_tmp_data[isub,]))
}
perm_tmp_res[,3] = perm_tmp_res[,1]==perm_tmp_res[,2]
perm_accuracy = sum(perm_tmp_res[,3])/dim(perm_tmp_data)[1]
tmp_perm_res[iperm, "perm_num"] = iperm
tmp_perm_res[iperm, "accuracy"] = perm_accuracy
}
p_value = (sum(tmp_perm_res$accuracy>=accuracy)+1)/(nperm+1)
result$pval = p_value
result$mean_null = mean(tmp_perm_res$accuracy)
}
return(result)
} # function gaze_fingerprint_classifier
# subjects to remove because of too many NAs
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
features2use = c("all", semantic_features)
gfp_res = data.frame(matrix(nrow = length(features2use), ncol = 3))
colnames(gfp_res) = c("accuracy", "mean_null", "pval")
rownames(gfp_res) = features2use
for (ifeature in 1:length(features2use)){
file2use = file.path(resultpath,sprintf("weight_%s_id_mat_%s.csv",WEIGHT, features2use[ifeature]))
gfp_res[ifeature,] = gaze_fingerprint_classifier(file2use, na_mask, nperm=nperm)
}
gfp_res
## accuracy mean_null pval
## all 0.5238095 0.009609524 0.000999001
## text 0.5047619 0.009257143 0.000999001
## face 0.4952381 0.009228571 0.000999001
## emotion 0.3904762 0.009380952 0.000999001
## sound 0.2952381 0.009419048 0.000999001
## smell 0.4857143 0.009533333 0.000999001
## taste 0.5428571 0.009704762 0.000999001
## touch 0.4857143 0.009495238 0.000999001
## motion 0.4666667 0.009238095 0.000999001
## operability 0.5428571 0.009485714 0.000999001
## watchability 0.5238095 0.009314286 0.000999001
## touched 0.4857143 0.009228571 0.000999001
## gazed 0.4666667 0.009419048 0.000999001
## human 0.4857143 0.009390476 0.000999001
## animal 0.3904762 0.009800000 0.000999001
## social 0.4857143 0.009257143 0.000999001
## nonsocial 0.4666667 0.009523810 0.000999001
data2plot = gfp_res
data2plot$feature = rownames(gfp_res)
data2plot = data2plot %>% filter(!feature=="all")
data2plot$cluster = NA
red_clust = c("text","watchability")
yellow_clust = c("animal")
green_clust = c("emotion","sound")
purple_clust = c("smell","operability","nonsocial","taste")
blue_clust = c("human","touched","social","face","motion","gazed")
orange_clust = c("touch")
data2plot$cluster[is.element(data2plot$feature,red_clust)] = "red"
data2plot$cluster_color[is.element(data2plot$feature,red_clust)] = "#E41A1C"
data2plot$cluster[is.element(data2plot$feature,blue_clust)] = "blue"
data2plot$cluster_color[is.element(data2plot$feature,blue_clust)] = "#377EB8"
data2plot$cluster[is.element(data2plot$feature,green_clust)] = "green"
data2plot$cluster_color[is.element(data2plot$feature,green_clust)] = "#4DAF4A"
data2plot$cluster[is.element(data2plot$feature,purple_clust)] = "purple"
data2plot$cluster_color[is.element(data2plot$feature,purple_clust)] = "#984EA3"
data2plot$cluster[is.element(data2plot$feature,orange_clust)] = "orange"
data2plot$cluster_color[is.element(data2plot$feature,orange_clust)] = "#FF7F00"
data2plot$cluster[is.element(data2plot$feature,yellow_clust)] = "yellow"
data2plot$cluster_color[is.element(data2plot$feature,yellow_clust)] = "#FFFF33"
data2plot$cluster = factor(data2plot$cluster)
data2plot$feature = factor(data2plot$feature, levels = rev(c("taste","operability","watchability","text","face","smell","touch","touched","human","social","motion","gazed","nonsocial","emotion","animal","sound")))
# load in bootstrap accuracy for all stimuli
bootaccall = read.csv(file.path(resultpath,"bootstrap_accuracy_95CIs.csv"))
p = ggplot(data = data2plot, aes(y = feature, x = accuracy, fill = cluster)) + geom_bar(stat = "identity") + geom_vline(xintercept=gfp_res["all","accuracy"]) + geom_vline(xintercept=bootaccall[1,"low95"], linetype = "longdash") + geom_vline(xintercept=bootaccall[1,"hi95"], linetype = "longdash") + scale_fill_manual(values = c("#377EB8","#4DAF4A","#FF7F00","#984EA3","#E41A1C","#FFFF33")) + ylab("Semantic Feature") + xlab("Accuracy") + coord_cartesian(xlim=c(0.25,0.80)) + guides(fill="none") + ggtitle(plot_title_name) + easy_center_title()
ggsave(filename = file.path(plot_path, "fingerprint_accuracy_global_semantic_features.pdf"),
width = 4, height = 5)
p
wordcloud(words = data2plot$feature,
freq = data2plot$accuracy^12,
random.order = FALSE,
rot.per=0,
colors=data2plot$cluster_color,
ordered.colors=TRUE)
df_res = classifier_output_sub_by_stim
df_res = df_res[!na_mask,]
df_res_perm = classifier_output_stim_by_perm
nperm = ncol(df_res_perm)
subs2use = rownames(df_res)
df2plot = data.frame(stim_names = stim_names,
stim_ids = stim_ids,
accuracy = colSums(df_res[,stim_ids])/dim(df_res)[1],
site="IIT")
df2plot$site = factor(df2plot$site)
# calculate p-values based on permutation accuracies
for (istim in 1:nstimuli){
df2plot$pval[istim] = (sum(df_res_perm[istim,]>=df2plot$accuracy[istim])+1)/(nperm+1)
}
# calculate FDR
df2plot$fdr = p.adjust(df2plot$pval, method = "fdr")
df2plot$fingerprint = "No"
df2plot$fingerprint[df2plot$fdr<=fdr_thresh] = "Yes"
df2plot = merge(df2plot, semantic_features_annot, by.x = "stim_ids", by.y= "stimulus")
df2plot$stim_cluster_name = "C2"
df2plot$stim_cluster_name[df2plot$cluster==1] = "C1"
## Accuracy to fingerprintability plots
# make accuracy plot across all stimuli
p1 = ggplot(data = df2plot, aes(x = fingerprint, y = accuracy, colour=fingerprint)) +
geom_jitter(width=0.1, alpha = 0.5) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75), fill=NA, colour="black") +
scale_colour_manual(values = c("orange","dodger blue")) + guides(colour=FALSE) +
xlab("Fingerprintable") +
ylab("Accuracy") + ggtitle(plot_title_name) + easy_center_title()
# make accuracy plot, but color according to stimulus cluster
p2 = ggplot(data = df2plot, aes(x = stim_cluster_name, y = accuracy, colour=stim_cluster_name)) +
geom_jitter(width=0.1, alpha = 0.5) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75), fill=NA, colour="black") +
guides(colour=FALSE) + scale_colour_manual(values = c("#E41A1C","#377EB8")) +
xlab("Stimulus Cluster") +
ylab("Accuracy") + ggtitle(plot_title_name) + easy_center_title()
p_final = p1+p2
p_final
# tabulate number of stimuli where fingerprinting is possible or not
table(df2plot$fingerprint)
##
## No Yes
## 40 660
# what percentage of the 700 stimuli allow us to gaze fingerprint?
sum(df2plot$fingerprint=="Yes")/nstimuli
## [1] 0.9428571
# tabulate number of stimuli where fingerprinting is possible or not, by stimulus cluster
table(df2plot$fingerprint, df2plot$stim_cluster_name)
##
## C1 C2
## No 13 27
## Yes 145 515
# chi-square test on that fingerprint by stimulus cluster contingency table
chisq.test(table(df2plot$fingerprint, df2plot$stim_cluster_name))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(df2plot$fingerprint, df2plot$stim_cluster_name)
## X-squared = 1.8283, df = 1, p-value = 0.1763
# t-test comparing accuracy of C2 vs C1 stimuli
t.test(df2plot$accuracy[df2plot$stim_cluster_name=="C2"], df2plot$accuracy[df2plot$stim_cluster_name=="C1"])
##
## Welch Two Sample t-test
##
## data: df2plot$accuracy[df2plot$stim_cluster_name == "C2"] and df2plot$accuracy[df2plot$stim_cluster_name == "C1"]
## t = 4.4543, df = 282.65, p-value = 1.214e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.004413837 0.011403721
## sample estimates:
## mean of x mean of y
## 0.04781234 0.03990356
# Cohen's d effect size for accuracy of C2 vs C1 stimuli
dres = cohens_d(df2plot$accuracy[df2plot$stim_cluster_name=="C2"], df2plot$accuracy[df2plot$stim_cluster_name=="C1"]); dres
## [1] 0.3778695
# Accuracies across all semantic feature categories
cols2use = c("feature","accuracy","std","sem")
result_df = data.frame(matrix(nrow = length(semantic_features), ncol = length(cols2use)))
rownames(result_df) = semantic_features
colnames(result_df) = cols2use
for (sem_feat in semantic_features){
mask = semantic_features_annot[sem_feat]==1
stims2use = stim_ids[mask]
result_df[sem_feat,"feature"] = sem_feat
result_df[sem_feat,"accuracy"] = mean(colSums(df_res[stims2use])/dim(df_res)[1])
result_df[sem_feat,"std"] = sd(colSums(df_res[,stims2use])/dim(df_res)[1])
result_df[sem_feat,"sem"] = sd(colSums(df_res[,stims2use])/dim(df_res)[1])/sqrt(sum(mask))
}
# Fingerprint Ratios across all semantic feature categories
cols2use = semantic_features
result_df_fpr = data.frame(matrix(nrow = dim(fpratio_sub_by_stim)[1], ncol = length(semantic_features)))
rownames(result_df_fpr) = rownames(fpratio_sub_by_stim)
colnames(result_df_fpr) = cols2use
for (sem_feat in semantic_features){
mask = semantic_features_annot[sem_feat]==1
stims2use = stim_ids[mask]
result_df_fpr[,sem_feat] = rowMeans(fpratio_sub_by_stim[,stims2use], na.rm= TRUE)
}
result_df_fpr$subid = rownames(result_df_fpr)
df2plot = melt(result_df_fpr, id.vars = "subid")
result_df$feature = with(result_df, reorder(feature, accuracy))
feature_clusters = data.frame(feature = names(sem_feat_res$Best.partition), cluster = sem_feat_res$Best.partition)
result_df = merge(result_df, feature_clusters, by = "feature")
result_df$cluster = factor(result_df$cluster)
result_df
## feature accuracy std sem cluster
## 1 animal 0.04594929 0.02199788 0.0017726400 6
## 2 emotion 0.05136054 0.02124055 0.0017951541 3
## 3 face 0.04800292 0.02147595 0.0009408784 2
## 4 gazed 0.04863946 0.02146720 0.0015333715 2
## 5 human 0.04851564 0.02141948 0.0009797042 2
## 6 motion 0.04591730 0.02088212 0.0011691743 2
## 7 nonsocial 0.03990356 0.01908252 0.0015181242 4
## 8 operability 0.04485714 0.02006360 0.0014187105 4
## 9 smell 0.04393241 0.02238083 0.0023207841 4
## 10 social 0.04876274 0.02142316 0.0010010388 2
## 11 sound 0.04839969 0.01750025 0.0022406774 3
## 12 taste 0.04685917 0.02307555 0.0016829578 4
## 13 text 0.04889663 0.02156858 0.0013751624 1
## 14 touch 0.04217010 0.02008151 0.0014164406 5
## 15 touched 0.04902153 0.02146839 0.0012563426 2
## 16 watchability 0.04688462 0.02097272 0.0010460241 1
p = ggplot(data = result_df, aes(y = feature, x = accuracy, fill = cluster)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(xmin=accuracy-sem, xmax=accuracy+sem), width=0.2) +
guides(fill=FALSE) +
coord_cartesian(xlim = c(0.035, 0.09)) +
xlab("Accuracy") + ylab("Semantic Feature") +
scale_fill_manual(values = unique(col_cols)) + ggtitle(plot_title_name) + easy_center_title()
ggsave(filename = file.path(plot_path, "fingerprint_accuracy_local_semantic_features.pdf"),
width = 4, height = 5)
p
fp_res = nfpstim_per_subject
fp_res$subids = rownames(fp_res)
fp_perm_res = nfpstim_sub_by_perm
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
fp_res = fp_res %>% filter(!na_mask)
fp_perm_res = fp_perm_res %>% filter(!na_mask)
# figure out p-values for each subject based on permutation nfingerprintable stim
for (isub in 1:dim(fp_res)[1]){
fp_res$pval[isub] = (sum(fp_perm_res[isub,]>=fp_res$nfingerprintable_stimuli[isub])+1)/(nperm+1)
}
fp_res$fdr = p.adjust(fp_res$pval, method = "fdr")
fp_res$Fingerprintable = "Yes"
fp_res$Fingerprintable[fp_res$fdr>fdr_thresh] = "No"
fp_perm_res$Fingerprintable = "Yes"
fp_perm_res$Fingerprintable[fp_res$fdr>fdr_thresh] = "No"
# subjects who have significantly more nfingerprintable stimuli than expected by chance
fp_subs2include = fp_res$subids[fp_res$fdr<=fdr_thresh]
print(sprintf("%d subjects of %d with statistically significant number of fingerprintable stimuli",
length(fp_subs2include),
dim(fp_res)[1]))
## [1] "96 subjects of 105 with statistically significant number of fingerprintable stimuli"
# subjects whose nfingerprintable stimuli is no better than chance
fp_subs2exclude = fp_res$subids[fp_res$fdr>fdr_thresh]
print(sprintf("%d subjects of %d with non-statistically significant number of fingerprintable stimuli",
length(fp_subs2exclude),
dim(fp_res)[1]))
## [1] "9 subjects of 105 with non-statistically significant number of fingerprintable stimuli"
reorder_vect = order(fp_res[,"nfingerprintable_stimuli"])
ordered_fp_res_subs = rownames(fp_res)[reorder_vect]
ordered_fp_res = fp_res[ordered_fp_res_subs,]
ordered_fp_res
## nfingerprintable_stimuli c1 c2 subids pval fdr
## a8681Py534 0 0 0 a8681Py534 1.000000000 1.00000000
## hvu5113B2r 3 0 3 hvu5113B2r 0.591408591 0.59709521
## j51h15E4xv 7 2 5 j51h15E4xv 0.119880120 0.12220789
## nxaQkud83t 8 3 5 nxaQkud83t 0.082917083 0.08620093
## Wj74c75671 8 1 7 Wj74c75671 0.055944056 0.05933460
## M3go34rg95 9 3 6 M3go34rg95 0.035964036 0.03933566
## T1358ru144 10 4 6 T1358ru144 0.006993007 0.01184300
## Yjkv6qz9d1 10 1 10 Yjkv6qz9d1 0.020979021 0.02394345
## b9u81qz6G5 11 2 9 b9u81qz6G5 0.088911089 0.09152612
## P9a579it51 12 4 8 P9a579it51 0.046953047 0.05082546
## I89n948112 13 3 12 I89n948112 0.056943057 0.05979021
## b57665B367 13 3 11 b57665B367 0.029970030 0.03347716
## aadbq388Ob 13 2 11 aadbq388Ob 0.032967033 0.03643725
## b5F118a654 13 3 11 b5F118a654 0.012987013 0.01585624
## b1l2a37p3Z 13 5 9 b1l2a37p3Z 0.049950050 0.05351791
## o77M6w8m26 15 4 13 o77M6w8m26 0.009990010 0.01344809
## ytbukz3wX4 16 4 12 ytbukz3wX4 0.010989011 0.01373626
## Fljc85x4j4 16 4 13 Fljc85x4j4 0.000999001 0.01184300
## tu2b9S1ms9 16 4 12 tu2b9S1ms9 0.010989011 0.01373626
## vhs2B895gm 16 3 13 vhs2B895gm 0.014985015 0.01767895
## okj19fo9Fh 16 4 12 okj19fo9Fh 0.003996004 0.01184300
## fgJ6xw7668 17 4 14 fgJ6xw7668 0.006993007 0.01184300
## u17w5462Em 18 4 14 u17w5462Em 0.009990010 0.01344809
## ewv9511u6J 18 3 15 ewv9511u6J 0.002997003 0.01184300
## hy2wC38x85 18 5 13 hy2wC38x85 0.002997003 0.01184300
## z4wsu3Z4l2 19 2 17 z4wsu3Z4l2 0.009990010 0.01344809
## wG7615jxji 19 4 15 wG7615jxji 0.005994006 0.01184300
## c2i1N145o5 19 4 15 c2i1N145o5 0.006993007 0.01184300
## I28284j41f 19 3 16 I28284j41f 0.004995005 0.01184300
## ggr1delQ9y 19 5 15 ggr1delQ9y 0.010989011 0.01373626
## uw968g1F9o 19 3 16 uw968g1F9o 0.002997003 0.01184300
## x5Ny88741k 19 2 17 x5Ny88741k 0.007992008 0.01291017
## x75646ye9T 20 6 14 x75646ye9T 0.005994006 0.01184300
## j6r171siTa 20 10 10 j6r171siTa 0.002997003 0.01184300
## z4z93Q7hs2 21 4 17 z4z93Q7hs2 0.014985015 0.01767895
## y9s1dvTq2q 21 3 18 y9s1dvTq2q 0.003996004 0.01184300
## ox17xwBqy6 21 7 14 ox17xwBqy6 0.008991009 0.01344809
## S91d18oh3x 21 3 20 S91d18oh3x 0.006993007 0.01184300
## p15y46v54P 22 0 22 p15y46v54P 0.002997003 0.01184300
## gJuiz877gn 22 9 14 gJuiz877gn 0.006993007 0.01184300
## xj3R2c13ha 22 3 19 xj3R2c13ha 0.002997003 0.01184300
## neCz5h3c81 22 2 20 neCz5h3c81 0.000999001 0.01184300
## jmHk8186fv 22 5 17 jmHk8186fv 0.006993007 0.01184300
## m3837o9wA8 23 3 20 m3837o9wA8 0.003996004 0.01184300
## dKfyt77oq4 24 3 21 dKfyt77oq4 0.006993007 0.01184300
## Zd122o2d96 24 5 19 Zd122o2d96 0.009990010 0.01344809
## s145O1udx1 25 5 21 s145O1udx1 0.020979021 0.02394345
## Opb6k5eke1 25 4 21 Opb6k5eke1 0.024975025 0.02819761
## tx43oiW5ps 25 3 22 tx43oiW5ps 0.005994006 0.01184300
## b83u91R6q5 25 2 23 b83u91R6q5 0.001998002 0.01184300
## x1J5788214 25 6 20 x1J5788214 0.002997003 0.01184300
## M97oq2u9z2 25 2 25 M97oq2u9z2 0.008991009 0.01344809
## ax5I99lf78 26 2 24 ax5I99lf78 0.001998002 0.01184300
## e813o11n7T 27 8 19 e813o11n7T 0.004995005 0.01184300
## j1218Kcj5g 28 6 23 j1218Kcj5g 0.006993007 0.01184300
## kju391wN3s 28 9 19 kju391wN3s 0.004995005 0.01184300
## p6o2qe1xQt 29 4 25 p6o2qe1xQt 0.006993007 0.01184300
## qixzE71638 30 5 25 qixzE71638 0.007992008 0.01291017
## jbypJkj3a8 31 3 28 jbypJkj3a8 0.010989011 0.01373626
## uxxiod4U24 32 6 30 uxxiod4U24 0.003996004 0.01184300
## tb51gQ2f32 32 10 22 tb51gQ2f32 0.001998002 0.01184300
## k7ifvzH4yx 32 7 25 k7ifvzH4yx 0.009990010 0.01344809
## yzfoH84397 33 9 25 yzfoH84397 0.003996004 0.01184300
## b1Oe7158q9 33 7 27 b1Oe7158q9 0.004995005 0.01184300
## jT2r93k9m8 34 7 29 jT2r93k9m8 0.013986014 0.01687967
## Yoea9j7gg5 34 3 31 Yoea9j7gg5 0.006993007 0.01184300
## wUe1ha8l71 35 7 28 wUe1ha8l71 0.004995005 0.01184300
## Valj2s14g8 37 10 27 Valj2s14g8 0.007992008 0.01291017
## aSzdcei682 37 6 31 aSzdcei682 0.009990010 0.01344809
## wfvbs123Wn 37 9 29 wfvbs123Wn 0.010989011 0.01373626
## a34K18e29d 38 5 33 a34K18e29d 0.008991009 0.01344809
## c192S19qf7 40 15 28 c192S19qf7 0.002997003 0.01184300
## q241I76nd4 40 7 33 q241I76nd4 0.000999001 0.01184300
## yr8lL9pd54 41 9 32 yr8lL9pd54 0.004995005 0.01184300
## wc8b25ckoY 41 11 30 wc8b25ckoY 0.009990010 0.01344809
## y7g3x8266T 42 9 34 y7g3x8266T 0.008991009 0.01344809
## hB3nx98zae 42 7 35 hB3nx98zae 0.005994006 0.01184300
## P736124up6 44 8 38 P736124up6 0.001998002 0.01184300
## tn95dVvyfq 44 10 34 tn95dVvyfq 0.010989011 0.01373626
## y779g7Rjjm 44 8 36 y779g7Rjjm 0.006993007 0.01184300
## nI83m4382t 44 6 44 nI83m4382t 0.004995005 0.01184300
## u44Dr4gs86 45 8 37 u44Dr4gs86 0.001998002 0.01184300
## Z8m99686m1 46 9 41 Z8m99686m1 0.004995005 0.01184300
## yc3hB2f247 46 8 38 yc3hB2f247 0.002997003 0.01184300
## ddf1bWbguf 46 10 37 ddf1bWbguf 0.001998002 0.01184300
## f59k8Ac558 48 11 38 f59k8Ac558 0.003996004 0.01184300
## G62a6q96v1 48 11 38 G62a6q96v1 0.001998002 0.01184300
## N8hwg5979i 49 10 39 N8hwg5979i 0.005994006 0.01184300
## r76lKaa359 49 10 39 r76lKaa359 0.006993007 0.01184300
## i34257K5o6 50 11 39 i34257K5o6 0.005994006 0.01184300
## gc7955R7k8 51 11 40 gc7955R7k8 0.003996004 0.01184300
## v26h929uK8 53 10 43 v26h929uK8 0.006993007 0.01184300
## nJ334736dc 53 11 42 nJ334736dc 0.003996004 0.01184300
## niz32f6icK 56 13 47 niz32f6icK 0.006993007 0.01184300
## Rbu59l2m2e 57 16 42 Rbu59l2m2e 0.009990010 0.01344809
## e1n58k41tM 57 7 50 e1n58k41tM 0.005994006 0.01184300
## eij9G9b2l2 59 14 45 eij9G9b2l2 0.012987013 0.01585624
## j79z6Cf128 63 12 56 j79z6Cf128 0.008991009 0.01344809
## hli575Zm3a 64 12 53 hli575Zm3a 0.006993007 0.01184300
## Zh77t5831g 65 10 55 Zh77t5831g 0.004995005 0.01184300
## M19lz3o828 68 18 50 M19lz3o828 0.016983017 0.01981352
## Rw77rshc7y 70 10 60 Rw77rshc7y 0.006993007 0.01184300
## q7u25p4rM7 76 9 67 q7u25p4rM7 0.002997003 0.01184300
## tqH9bkdr78 85 12 73 tqH9bkdr78 0.006993007 0.01184300
## Xa376ga9a3 89 13 76 Xa376ga9a3 0.005994006 0.01184300
## Fingerprintable
## a8681Py534 No
## hvu5113B2r No
## j51h15E4xv No
## nxaQkud83t No
## Wj74c75671 No
## M3go34rg95 Yes
## T1358ru144 Yes
## Yjkv6qz9d1 Yes
## b9u81qz6G5 No
## P9a579it51 No
## I89n948112 No
## b57665B367 Yes
## aadbq388Ob Yes
## b5F118a654 Yes
## b1l2a37p3Z No
## o77M6w8m26 Yes
## ytbukz3wX4 Yes
## Fljc85x4j4 Yes
## tu2b9S1ms9 Yes
## vhs2B895gm Yes
## okj19fo9Fh Yes
## fgJ6xw7668 Yes
## u17w5462Em Yes
## ewv9511u6J Yes
## hy2wC38x85 Yes
## z4wsu3Z4l2 Yes
## wG7615jxji Yes
## c2i1N145o5 Yes
## I28284j41f Yes
## ggr1delQ9y Yes
## uw968g1F9o Yes
## x5Ny88741k Yes
## x75646ye9T Yes
## j6r171siTa Yes
## z4z93Q7hs2 Yes
## y9s1dvTq2q Yes
## ox17xwBqy6 Yes
## S91d18oh3x Yes
## p15y46v54P Yes
## gJuiz877gn Yes
## xj3R2c13ha Yes
## neCz5h3c81 Yes
## jmHk8186fv Yes
## m3837o9wA8 Yes
## dKfyt77oq4 Yes
## Zd122o2d96 Yes
## s145O1udx1 Yes
## Opb6k5eke1 Yes
## tx43oiW5ps Yes
## b83u91R6q5 Yes
## x1J5788214 Yes
## M97oq2u9z2 Yes
## ax5I99lf78 Yes
## e813o11n7T Yes
## j1218Kcj5g Yes
## kju391wN3s Yes
## p6o2qe1xQt Yes
## qixzE71638 Yes
## jbypJkj3a8 Yes
## uxxiod4U24 Yes
## tb51gQ2f32 Yes
## k7ifvzH4yx Yes
## yzfoH84397 Yes
## b1Oe7158q9 Yes
## jT2r93k9m8 Yes
## Yoea9j7gg5 Yes
## wUe1ha8l71 Yes
## Valj2s14g8 Yes
## aSzdcei682 Yes
## wfvbs123Wn Yes
## a34K18e29d Yes
## c192S19qf7 Yes
## q241I76nd4 Yes
## yr8lL9pd54 Yes
## wc8b25ckoY Yes
## y7g3x8266T Yes
## hB3nx98zae Yes
## P736124up6 Yes
## tn95dVvyfq Yes
## y779g7Rjjm Yes
## nI83m4382t Yes
## u44Dr4gs86 Yes
## Z8m99686m1 Yes
## yc3hB2f247 Yes
## ddf1bWbguf Yes
## f59k8Ac558 Yes
## G62a6q96v1 Yes
## N8hwg5979i Yes
## r76lKaa359 Yes
## i34257K5o6 Yes
## gc7955R7k8 Yes
## v26h929uK8 Yes
## nJ334736dc Yes
## niz32f6icK Yes
## Rbu59l2m2e Yes
## e1n58k41tM Yes
## eij9G9b2l2 Yes
## j79z6Cf128 Yes
## hli575Zm3a Yes
## Zh77t5831g Yes
## M19lz3o828 Yes
## Rw77rshc7y Yes
## q7u25p4rM7 Yes
## tqH9bkdr78 Yes
## Xa376ga9a3 Yes
fp_res$subids = factor(fp_res$subids, levels = ordered_fp_res_subs)
fp_perm_res$subids = rownames(fp_res)
fp_perm_res$subids = factor(fp_perm_res$subids, levels = ordered_fp_res_subs)
# melt perm data frame for plotting
melted_fp_perm_res = melt(fp_perm_res,id.vars = c("subids","Fingerprintable"))
# make plot
markerSize = 7
markerColor="blue"
fontSize = 10
# make ridges for the null distribution
p = ggplot(melted_fp_perm_res, aes(x = value, y = subids, fill=Fingerprintable)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
xlab("# of Fingerprintable Stimuli") + ggtitle("# Fingerprintable Stimuli")
# add star for actual value
p = p + geom_text(data = fp_res, aes(x = nfingerprintable_stimuli, y=subids), color=markerColor, label="*",size=markerSize) +
guides(color=FALSE, alpha=FALSE) + ggtitle(plot_title_name) + easy_center_title() +
theme(axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize),
axis.title.x = element_text(size=fontSize),
strip.text.x = element_text(size=fontSize),
axis.title.y = element_text(size=fontSize),
plot.title = element_text(hjust = 0.5))
ggsave(p,filename = file.path(plot_path, "number_fingerprintable_stimuli_per_subject.pdf"),
width=6,height=7)
p
# make ggridges plot for gaze uniqueness index (GUI; aka fingerprint ratio)
# test each subject for whether GUI is greater than the null value of 0 after log10 transformation (because GUI is positively skeweed). This will allow you to identify which subjects have an overall GUI across all 700 stimuli greater than the null value of 0.
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
tmp_gui = fpratio_sub_by_stim %>% filter(!na_mask)
tmp_gui = t(tmp_gui)
gui_res_sub = data.frame(matrix(nrow = dim(tmp_gui)[2], ncol = 4))
colnames(gui_res_sub) = c("subid","tstat","pval","fdr")
rownames(gui_res_sub) = colnames(tmp_gui)
subids2use = colnames(tmp_gui)
for (sub in subids2use){
gui_res_sub[sub, "subid"] = sub
tres = t.test(log10(tmp_gui[,sub]), mu = 0)
gui_res_sub[sub, "tstat"] = tres$statistic[[1]]
gui_res_sub[sub, "pval"] = tres$p.value[[1]]
}
gui_res_sub$fdr = p.adjust(gui_res_sub$pval, method = "fdr")
gui_res_sub$Unique = "Yes"
# anything with tstat<0 of fdr>0.05 is not unique
gui_res_sub$Unique[gui_res_sub$tstat<0 | gui_res_sub$fdr>0.05] = "No"
gui_res_sub = gui_res_sub[order(gui_res_sub$tstat),]
gui_res_sub$subid = factor(gui_res_sub$subid, levels = gui_res_sub$subid)
table(gui_res_sub$Unique)
##
## No Yes
## 21 84
gui_res_sub = gui_res_sub[order(-gui_res_sub$tstat),]
gui_res_sub$uniqueness_rank = c(1:dim(gui_res_sub)[1]) # make gaze uniqueness ranking based on strength of tstat
gui_res = fpratio_sub_by_stim %>% filter(!na_mask)
gui_res$subid = rownames(gui_res)
gui_res$subid = factor(gui_res$subid, levels = rev(gui_res_sub$subid))
gui_res = merge(gui_res, gui_res_sub[,c("subid","Unique")])
melted_gui_res = melt(gui_res, id.vars = c("subid","Unique"))
# make ridges plot
p = ggplot(melted_gui_res, aes(x = value, y = subid, fill=Unique)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) + xlim(-1,7) + geom_vline(xintercept = 1) +
xlab("Gaze Uniqueness Index (GUI)") + ggtitle(plot_title_name) + easy_center_title()
ggsave(p,filename = file.path(plot_path, "gaze_uniqueness_index_per_subject.pdf"),
width=6,height=7)
p
# subjects to remove because of too many NAs
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
# compute jaccard distance matrix which is percentage of overlap in fingerprintable stimuli
jaccard_mat = as.matrix(dist(as.matrix(classifier_output_sub_by_stim), method = "binary"))
jaccard_mat = jaccard_mat[-which(na_mask),-which(na_mask)]
heatmap(as.matrix(jaccard_mat),
hclustfun=function(d) hclust(d, method=clust_method),
scale = "none")
# plot heatmap of classifier output matrix of subjects by stimuli matrix
tmp_data = classifier_output_sub_by_stim[-which(na_mask),]
heatmap(as.matrix(tmp_data),
hclustfun=function(d) hclust(d, method=clust_method),
scale = "none")
nb_clust_res = NbClust(data = as.matrix(tmp_data),index = "silhouette", distance = "binary",method = "ward.D2", min.nc=1,max.nc=dim(tmp_data)[1]-1)
nb_clust_res$Best.nc
## Number_clusters Value_Index
## 104.000 0.982
df_sem_feat = data.frame(df_sem_feat)
sem_feat_names = colnames(df_sem_feat)
nsubs = dim(fpratio_sub_by_stim)[1]
mean_fpr_per_sem_feat_res = data.frame(matrix(nrow = nsubs, ncol = length(sem_feat_names)))
rownames(mean_fpr_per_sem_feat_res) = rownames(fpratio_sub_by_stim)
colnames(mean_fpr_per_sem_feat_res) = sem_feat_names
mean_fpr_per_sem_feat_res$all = NA
for (isub in 1:nsubs){
for (isf in 1:length(sem_feat_names)){
# get mean fingerprint ratio
mean_fpr_per_sem_feat_res[isub, isf] = median(as.numeric(fpratio_sub_by_stim[isub,df_sem_feat[,isf]==1]), na.rm = TRUE)
} # for (isf in 1:length(sem_feat_names)){
mean_fpr_per_sem_feat_res[isub, "all"] = median(as.numeric(fpratio_sub_by_stim[isub,]), na.rm = TRUE)
mean_fpr_per_sem_feat_res[isub, "c1"] = median(as.numeric(fpratio_sub_by_stim[isub,cluster_res[,"clust_labels"]==1]), na.rm = TRUE)
mean_fpr_per_sem_feat_res[isub, "c2"] = median(as.numeric(fpratio_sub_by_stim[isub,cluster_res[,"clust_labels"]==2]), na.rm = TRUE)
} # for (isub in 1:nsubs){
df_res = classifier_output_sub_by_stim
# remove subs that don't have statistically significant nfingerprintable stimuli
df_res = df_res[fp_subs2include,]
nsubs = dim(df_res)[1]
nTotal = nstimuli
enrich_res_OR = data.frame(matrix(nrow = nsubs, ncol = length(sem_feat_names)))
colnames(enrich_res_OR) = sem_feat_names
rownames(enrich_res_OR) = rownames(df_res)
enrich_res_P = data.frame(matrix(nrow = nsubs, ncol = length(sem_feat_names)))
colnames(enrich_res_P) = sem_feat_names
rownames(enrich_res_P) = rownames(df_res)
nfp_res = data.frame(matrix(nrow = nsubs, ncol=1))
rownames(nfp_res) = rownames(df_res)
colnames(nfp_res) = c("nFP")
for (isub in 1:nsubs){
nFP = sum(df_res[isub,])
nfp_res[isub,1] = nFP
fp_mask = df_res[isub,]==1
for (isf in 1:length(sem_feat_names)){
semfeat2use = sem_feat_names[isf]
nSemantic = sum(df_sem_feat[,semfeat2use])
nFP_given_Semantic = sum(df_sem_feat[fp_mask,semfeat2use])
tmp_enrich_res = enrichmentTest(nFP_given_Semantic, nFP, nSemantic, nTotal)
enrich_res_OR[isub,semfeat2use] = tmp_enrich_res$OR
enrich_res_P[isub,semfeat2use] = tmp_enrich_res$p
} # for (isf in 1:length(sem_feat_names)){
} # for (isub in 1:nsubs){
clust_method = "ward.D2"
nbclust_index = c("kl","ch","ccc","cindex","db","silhouette","duda","pseudot2",
"ratkowsky","ptbiserial","gap","mcclain","gamma","gplus","tau",
"sdindex")
res = NbClust(data = enrich_res_OR, method = clust_method, index = nbclust_index)
a = data.frame(res$Best.nc[1,]); colnames(a) = "k"; a$index = rownames(a)
best_nk = as.numeric(names(table(a$k))[table(a$k)==max(table(a$k))])
res = NbClust(data = enrich_res_OR, method = clust_method, index = "ch")
row_subtype = res$Best.partition
row_cols = brewer.pal(9, "Set1")[res$Best.partition]
nbclust_index = c("kl","ch","cindex","db","silhouette","duda","pseudot2",
"ratkowsky","ptbiserial","gap","mcclain","gamma","gplus","tau",
"sdindex")
res = NbClust(data = t(enrich_res_OR), method = clust_method, index=nbclust_index)
a = data.frame(res$Best.nc[1,]); colnames(a) = "k"; a$index = rownames(a)
best_nk = as.numeric(names(table(a$k))[table(a$k)==max(table(a$k))])
# res = NbClust(data = t(enrich_res_OR), method = clust_method, index="cindex")
res = NbClust(data = t(enrich_res_OR), method = clust_method, index="mcclain")
col_cols = brewer.pal(9, "Set1")[res$Best.partition]
feature_clusters = data.frame(feature = colnames(enrich_res_OR), cluster = col_cols)
new_row_cols = get_ggColorHue(6)
tmp_row_cols = unique(row_cols)
row_cols[row_cols==tmp_row_cols[1]] = new_row_cols[3]
row_cols[row_cols==tmp_row_cols[2]] = new_row_cols[4]
tmp_col_cols = unique(col_cols)
col_cols[col_cols==tmp_col_cols[1]] = new_row_cols[5]
col_cols[col_cols==tmp_col_cols[2]] = new_row_cols[6]
heatmap(as.matrix(enrich_res_OR),
hclustfun=function(d) hclust(d, method=clust_method),
scale = "none",
RowSideColors=row_cols,
ColSideColors=col_cols)
# plot this so you can screenshot the color scale
ggheatmap(as.matrix(enrich_res_OR), color=colorRampPalette(hcl.colors(12, "YlOrRd", rev = TRUE))(100), scale="none",legendName="OR")
subtype_df = data.frame(subid = rownames(enrich_res_OR), subtype = row_subtype)
table(subtype_df$subtype)
##
## 1 2
## 83 13
enrich_res_OR$subid = rownames(enrich_res_OR)
df2plot = merge(enrich_res_OR, subtype_df, by = "subid")
rownames(df2plot) = df2plot$subid
df2plot$subtype = factor(df2plot$subtype)
df4plot = melt(df2plot, id.vars = c("subid","subtype"))
h_test_res = list()
res_colnames = c("feature","t","p","d")
for (subtype in unique(row_subtype)){
subtype_name = sprintf("S%d",subtype)
h_test_res[[subtype_name]] = data.frame(matrix(
nrow = length(sem_feat_names),
ncol = length(res_colnames)))
rownames(h_test_res[[subtype_name]]) = sem_feat_names
colnames(h_test_res[[subtype_name]]) = res_colnames
}
for (subtype in unique(row_subtype)){
subtype_name = sprintf("S%d",subtype)
for (sem_feat in sem_feat_names){
h_test_res[[subtype_name]][sem_feat, "feature"] = sem_feat
t_res = t.test(df2plot[df2plot$subtype==subtype,sem_feat], mu=1)
h_test_res[[subtype_name]][sem_feat, "t"] = t_res$statistic
h_test_res[[subtype_name]][sem_feat, "p"] = t_res$p.value
tmp_mu = mean(df2plot[df2plot$subtype==subtype,sem_feat], na.rm = TRUE)
tmp_sd = sd(df2plot[df2plot$subtype==subtype,sem_feat], na.rm = TRUE)
h_test_res[[subtype_name]][sem_feat, "d"] = (tmp_mu-1)/tmp_sd
}
h_test_res[[subtype_name]]$fdr = p.adjust(h_test_res[[subtype_name]]$p, method = "fdr")
}
# check if subtypes differ by sex
a = merge(pheno_data,df2plot[,c("subid","subtype")], by.x = "subj_ID",by.y = "subid")
chisq.test(table(a$sex, a$subtype))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(a$sex, a$subtype)
## X-squared = 0.058965, df = 1, p-value = 0.8081
reorder_vect = order(h_test_res[["S2"]][,"t"])
ordered_sem_feat_by_t = c("gazed","touched","operability","touch","animal",
"sound","smell","nonsocial","taste","text","watchability",
"social","face","human","emotion","motion")
for (subtype in unique(row_subtype)){
subtype_name = sprintf("S%d",subtype)
print(subtype_name)
print(h_test_res[[subtype_name]][ordered_sem_feat_by_t,])
}
## [1] "S1"
## feature t p d fdr
## gazed gazed 2.1661232 0.0332055300 0.23776291 0.059032053
## touched touched 2.5479413 0.0127024533 0.27967289 0.025404907
## operability operability 0.3213331 0.7487755704 0.03527089 0.748775570
## touch touch -0.7376745 0.4628180138 -0.08097030 0.528934873
## animal animal 1.1698538 0.2454488564 0.12840814 0.327265142
## sound sound 1.5584495 0.1229780809 0.17106206 0.196764929
## smell smell 1.3352664 0.1854841340 0.14656453 0.269795104
## nonsocial nonsocial -0.5194587 0.6048402884 -0.05701800 0.645162974
## taste taste 2.5741125 0.0118480810 0.28254555 0.025404907
## text text 3.1121353 0.0025565751 0.34160123 0.008181040
## watchability watchability 2.7065642 0.0082693224 0.29708402 0.022051526
## social social 3.3726050 0.0011384602 0.37019149 0.006071788
## face face 3.5493575 0.0006424536 0.38959260 0.005139628
## human human 3.6098105 0.0005260897 0.39622818 0.005139628
## emotion emotion 3.2239396 0.0018157807 0.35387335 0.007263123
## motion motion 0.7709038 0.4429795787 0.08461769 0.528934873
## [1] "S2"
## feature t p d fdr
## gazed gazed 2.02586318 6.560453e-02 0.56187335 1.166303e-01
## touched touched 3.49893661 4.390461e-03 0.97043041 1.170790e-02
## operability operability -0.81363133 4.317048e-01 -0.22566073 5.313290e-01
## touch touch -2.57459856 2.433864e-02 -0.71406516 5.563118e-02
## animal animal 0.42147144 6.808622e-01 0.11689514 7.262530e-01
## sound sound 0.69520305 5.001691e-01 0.19281463 5.716219e-01
## smell smell -4.04854874 1.614529e-03 -1.12286539 5.166493e-03
## nonsocial nonsocial -12.54280065 2.949471e-08 -3.47874699 4.719154e-07
## taste taste -1.58125400 1.398048e-01 -0.43856095 2.033524e-01
## text text -0.83038716 4.225335e-01 -0.23030796 5.313290e-01
## watchability watchability -0.06352131 9.503972e-01 -0.01761764 9.503972e-01
## social social 9.96133582 3.734349e-07 2.76277747 2.987479e-06
## face face 4.71271372 5.032181e-04 1.30707161 2.012872e-03
## human human 7.42021807 8.051874e-06 2.05799821 4.294333e-05
## emotion emotion 2.08146317 5.946854e-02 0.57729402 1.166303e-01
## motion motion 1.77331216 1.015346e-01 0.49182830 1.624553e-01
df4plot$variable = factor(df4plot$variable, levels = ordered_sem_feat_by_t)
colnames(df4plot)[colnames(df4plot)=="variable"] = "feature"
colnames(df4plot)[colnames(df4plot)=="value"] = "OR"
feature_clusters$feature = factor(feature_clusters$feature, levels = levels(df4plot$feature))
df4plot$feature_cluster = NA
for (ifeat in 1:length(sem_feat_names)){
feature_name2use = sem_feat_names[ifeat]
cluster2use = feature_clusters[feature_clusters$feature==feature_name2use, "cluster"]
df4plot$feature_cluster[df4plot$feature==feature_name2use] = cluster2use
}
df4plot$feature_cluster = factor(df4plot$feature_cluster)
fingerprint_profiles = df4plot
p = ggplot(data = df4plot, aes(x = feature, y = OR, colour=feature_cluster)) +
facet_grid(subtype ~ .) +
geom_scatterbox() +
geom_hline(yintercept=1) +
xlab("Semantic Features") +
ylab("Odds Ratio") +
scale_colour_manual(values = c("#F564E3","#619CFF")) +
guides(colour=FALSE) +
coord_flip() + ggtitle(plot_title_name) + easy_center_title()
ggsave(filename = file.path(plot_path, "fingerprintable_stimuli_semantic_enrichment.pdf"),
width = 4, height = 5)
p
colnames(pheno_data)[colnames(pheno_data)=="subj_ID"] = "subid"
df2use = right_join(df2plot, pheno_data, by="subid")
df2use$subtype2 = "NF"
unique_subtypes = levels(df2use$subtype)
for (subtype in unique_subtypes){
df2use$subtype2[df2use$subtype==subtype] = sprintf("S%s",subtype)
}
df2use = merge(df2use, fp_res, by.x = "subid", by.y = "subids")
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
mean_fpr_per_sem_feat_res = mean_fpr_per_sem_feat_res %>% filter(!na_mask)
mean_fpr_per_sem_feat_res$subid = rownames(mean_fpr_per_sem_feat_res)
new_df = merge(mean_fpr_per_sem_feat_res, pheno_data, by = "subid")
pca_res = princomp(scale(new_df[,c("AQ_Tot","SRS_Tot_Raw")]))
summary(pca_res)
## Importance of components:
## Comp.1 Comp.2
## Standard deviation 1.3269348 0.4692510
## Proportion of Variance 0.8888431 0.1111569
## Cumulative Proportion 0.8888431 1.0000000
new_df$at_pc1 = pca_res$scores[,1]
new_df = merge(new_df, gui_res_sub, by = "subid")
new_df = merge(new_df, df2use[,c("subid","nfingerprintable_stimuli")])
# compute median intra-subject similarity
a = data.frame(med_intrasub=rowMedians(as.matrix(intrasubR_sub_by_stim), na.rm=TRUE), subid=rownames(intrasubR_sub_by_stim))
a = a %>% filter(!na_mask)
new_df = merge(new_df, a, by ="subid")
# compute median avg inter-subject similarity
a = data.frame(med_intersub=rowMedians(as.matrix(mean_intersubR_sub_by_stim), na.rm=TRUE), subid=rownames(mean_intersubR_sub_by_stim))
a = a %>% filter(!na_mask)
new_df = merge(new_df, a, by ="subid")
# global gui is median intra-sub divided by median avg inter-sub
new_df$global_gui = new_df$med_intrasub/new_df$med_intersub
# PCA gaze fingerprinting metrics
pca_res = princomp(scale(new_df[,c("all","nfingerprintable_stimuli")]))
summary(pca_res)
## Importance of components:
## Comp.1 Comp.2
## Standard deviation 1.3420773 0.42400585
## Proportion of Variance 0.9092452 0.09075481
## Cumulative Proportion 0.9092452 1.00000000
new_df$fp_pc1 = pca_res$scores[,1]
new_df$fp_pc2 = pca_res$scores[,2]
# define who is an autistic traits outlier by either SRS or AQ
new_df$AQ_outlier = "No"
new_df$AQ_outlier[new_df$AQ_Tot>26] = "Yes" # Woodbury-Smith et al., 2005 JADD cut-off
new_df$SRS_outlier = "No"
new_df$SRS_outlier[new_df$SRS_Tot_Raw>80.81] = "Yes" # 2 SD cut-off for TD Italian adults from SRS-2 manual
new_df$AT_outlier = new_df$SRS_outlier=="Yes" | new_df$AQ_outlier=="Yes"
# test correlation between AQ and SRS
cor.test(new_df$AQ_Tot, new_df$SRS_Tot_Raw)
##
## Pearson's product-moment correlation
##
## data: new_df$AQ_Tot and new_df$SRS_Tot_Raw
## t = 12.555, df = 103, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6886711 0.8436072
## sample estimates:
## cor
## 0.7776862
p = ggplot(data = new_df, aes(x = AQ_Tot, y = SRS_Tot_Raw)) +
geom_point(data = new_df, aes(x = AQ_Tot, y = SRS_Tot_Raw, colour = AT_outlier)) +
geom_smooth(method = lmrob, colour="black") + guides(colour=FALSE) +
xlab("AQ Total") + ylab("SRS Total")
ggsave(p, filename = file.path(plot_path, "AQ_by_SRS.pdf"))
p
# test correlation between number of fingerprintable stimuli and median GUI
cor.test(new_df$nfingerprintable_stimuli, new_df$all)
##
## Pearson's product-moment correlation
##
## data: new_df$nfingerprintable_stimuli and new_df$all
## t = 14.459, df = 103, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7434556 0.8731752
## sample estimates:
## cor
## 0.8184904
p = ggplot(data = new_df, aes(x = nfingerprintable_stimuli, y = all)) +
geom_point() +
geom_smooth(method = lmrob, colour="black") + guides(colour=FALSE) +
xlab("Fingerprintable Stimuli") + ylab("Median Gaze Uniqueness Index (GUI)")
ggsave(p, filename = file.path(plot_path, "nfingerprintable_stimuli_by_medianGUI.pdf"))
p
# Predicting fingerprinting simply by data quality metrics
# run linear model with FP PC1 predicted by session 1 calibration error
form2use = as.formula(sprintf("fp_pc1 ~ %s","S1_CalError_OSIE"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05343919 0.3155794 -0.169336760 0.8658639
## S1_CalError_OSIE 0.00446718 0.6249176 0.007148431 0.9943103
summary(mod2use)$r.squared
## [1] 9.375862e-07
summary(mod2use)$adj.r.squared
## [1] -0.009707791
# run linear model with FP PC1 predicted by session 2 calibration error
form2use = as.formula(sprintf("fp_pc1 ~ %s","S2_CalError_OSIE"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1362535 0.3555134 0.3832583 0.7023182
## S2_CalError_OSIE -0.3877509 0.6915105 -0.5607303 0.5761992
summary(mod2use)$r.squared
## [1] 0.004157037
summary(mod2use)$adj.r.squared
## [1] -0.005511341
# run linear model with nfingerprintable_stimuli predicted by session 1 calibration error
form2use = as.formula(sprintf("nfingerprintable_stimuli ~ %s","S1_CalError_OSIE"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.9199604 5.558861 5.38239010 4.641985e-07
## S1_CalError_OSIE -0.1978763 11.459405 -0.01726759 9.862565e-01
summary(mod2use)$r.squared
## [1] 1.05309e-05
summary(mod2use)$adj.r.squared
## [1] -0.009698105
# run linear model with nfingerprintable_stimuli predicted by session 2 calibration error
form2use = as.formula(sprintf("nfingerprintable_stimuli ~ %s","S2_CalError_OSIE"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.678850 5.644934 5.6119075 1.698568e-07
## S2_CalError_OSIE -3.804149 10.084743 -0.3772182 7.067875e-01
summary(mod2use)$r.squared
## [1] 0.00239626
summary(mod2use)$adj.r.squared
## [1] -0.007289213
# run linear model with GUI predicted by session 1 calibration error
form2use = as.formula(sprintf("all ~ %s","S1_CalError_OSIE"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.29746976 0.02659460 48.78695654 5.269852e-73
## S1_CalError_OSIE -0.00183596 0.04921164 -0.03730744 9.703121e-01
summary(mod2use)$r.squared
## [1] 1.698832e-05
summary(mod2use)$adj.r.squared
## [1] -0.009691585
# run linear model with GUI predicted by session 2 calibration error
form2use = as.formula(sprintf("all ~ %s","S2_CalError_OSIE"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.31075703 0.03194163 41.0360029 1.214480e-65
## S2_CalError_OSIE -0.02959576 0.06440776 -0.4595061 6.468391e-01
summary(mod2use)$r.squared
## [1] 0.002615422
summary(mod2use)$adj.r.squared
## [1] -0.007067923
# Using IQ as a predictor of gaze fingerprinting or autistic traits
# run linear model with FP PC1 predicted by IQ
form2use = as.formula(sprintf("fp_pc1 ~ %s","IQ"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.39247647 1.08994048 -1.277571 0.2047232
## IQ 0.01174875 0.01017254 1.154948 0.2512034
summary(mod2use)$r.squared
## [1] 0.0134611
summary(mod2use)$adj.r.squared
## [1] 0.002376395
# run linear model with autistic traits PC1 predicted by IQ
form2use = as.formula(sprintf("at_pc1 ~ %s","IQ"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7531484 0.855355830 0.8805089 0.3809552
## IQ -0.0072412 0.008138201 -0.8897790 0.3759834
summary(mod2use)$r.squared
## [1] 0.007843393
summary(mod2use)$adj.r.squared
## [1] -0.003304434
# Predicting autistic traits with fingerprintability
# run linear model with sex and FP PC1 and PC2 predicting AQ
form2use = as.formula(sprintf("AQ_Tot ~ sex + %s + %s","fp_pc1","fp_pc2"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.364503 0.8243157 18.639100 1.824882e-34
## sexM 1.813738 1.0910621 1.662360 9.954127e-02
## fp_pc1 -1.805911 0.3595318 -5.022952 2.199506e-06
## fp_pc2 -1.810874 1.2312633 -1.470745 1.444687e-01
summary(mod2use)$r.squared
## [1] 0.2113595
summary(mod2use)$adj.r.squared
## [1] 0.1879345
# run linear model with sex and FP PC1 and PC2 predicting SRS
form2use = as.formula(sprintf("SRS_Tot_Raw ~ sex + %s + %s","fp_pc1","fp_pc2"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.780824 3.098048 12.5178240 2.893014e-22
## sexM 7.296496 3.674221 1.9858619 4.975904e-02
## fp_pc1 -5.219800 1.334051 -3.9127433 1.656820e-04
## fp_pc2 -2.344254 4.072038 -0.5756956 5.661015e-01
summary(mod2use)$r.squared
## [1] 0.16288
summary(mod2use)$adj.r.squared
## [1] 0.138015
# run linear model with sex and FP PC1 and PC2 predicting autistic traits PC1
form2use = as.formula(sprintf("at_pc1 ~ sex + %s + %s","fp_pc1","fp_pc2"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3842312 0.16477950 -2.331790 2.169725e-02
## sexM 0.4375644 0.20101004 2.176829 3.182307e-02
## fp_pc1 -0.3571742 0.06909929 -5.168999 1.191564e-06
## fp_pc2 -0.2674932 0.21818839 -1.225974 2.230594e-01
summary(mod2use)$r.squared
## [1] 0.2266718
summary(mod2use)$adj.r.squared
## [1] 0.2037017
# model comparisons - gaze fingerprinting PC1, median intra-sub, median inter-sub
model_variances = data.frame(matrix(nrow = 3, ncol = 2))
rownames(model_variances) = c("Gaze Fingerprinting PC1","Median Intra-Subject Similarity","Median Inter-Subject Similarity")
colnames(model_variances) = c("Model","Percentage Variance Explained")
# run linear model with sex and FP PC1 predicting autistic traits PC1
form2use = as.formula(sprintf("at_pc1 ~ sex + %s","fp_pc1"))
model1 = lmrob(data = new_df, formula = form2use)
summary(model1)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4082435 0.16627080 -2.455293 1.576825e-02
## sexM 0.4766891 0.20125182 2.368620 1.973808e-02
## fp_pc1 -0.3558846 0.07041452 -5.054136 1.906856e-06
summary(model1)$r.squared
## [1] 0.2139194
model_variances["Gaze Fingerprinting PC1","Model"] = "Gaze Fingerprinting PC1"
model_variances["Gaze Fingerprinting PC1","Percentage Variance Explained"] = summary(model1)$r.squared
summary(model1)$adj.r.squared
## [1] 0.1985061
# run linear model with sex and median intrasub-similarity
form2use = as.formula(sprintf("at_pc1 ~ sex + %s","med_intrasub"))
model2 = lmrob(data = new_df, formula = form2use)
summary(model2)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6191083 0.7904356 2.048375 0.04309180
## sexM 0.4421196 0.2134600 2.071206 0.04086372
## med_intrasub -3.7449721 1.4378185 -2.604621 0.01057109
summary(model2)$r.squared
## [1] 0.1159003
model_variances["Median Intra-Subject Similarity","Model"] = "Median Intra-Subject Similarity"
model_variances["Median Intra-Subject Similarity","Percentage Variance Explained"] = summary(model2)$r.squared
summary(model2)$adj.r.squared
## [1] 0.09856502
# run linear model with sex and median avg intersub-similarity
form2use = as.formula(sprintf("at_pc1 ~ sex + %s","med_intersub"))
model3 = lmrob(data = new_df, formula = form2use)
summary(model3)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.37943581 1.2034072 -0.31530126 0.75317709
## sexM 0.38422092 0.2231596 1.72173179 0.08814948
## med_intersub 0.07526656 2.8775958 0.02615606 0.97918397
summary(model3)$r.squared
## [1] 0.02966478
model_variances["Median Inter-Subject Similarity","Model"] = "Median Inter-Subject Similarity"
model_variances["Median Inter-Subject Similarity","Percentage Variance Explained"] = summary(model3)$r.squared
summary(model3)$adj.r.squared
## [1] 0.0106386
feature2use = "fp_pc1"
p = ggplot(data = new_df, aes(x = .data[[feature2use]], y = at_pc1)) +
geom_point(data = new_df, aes(x = fp_pc1, y = at_pc1, colour = AT_outlier)) +
geom_smooth(method = lmrob, colour="black") + guides(colour=FALSE) +
ylab("Autistic Traits PC1") + xlab("Gaze Fingerprinting PC1")
ggsave(p, filename = file.path(plot_path, "autistic_traits_PC1_by_fingerprinting_PC1.pdf"))
p
x_var = "Percentage Variance Explained"
y_var = "Model"
model_variances$Model = factor(model_variances$Model,
levels = rev(c("Gaze Fingerprinting PC1",
"Median Intra-Subject Similarity",
"Median Inter-Subject Similarity")))
p = ggplot(data = model_variances, aes(x = .data[[x_var]], y = .data[[y_var]])) +
geom_bar(stat = "identity") + xlab("Percentage Variance Explained in Autistic Traits PC1")
ggsave(p, filename = file.path(plot_path, "autistic_traits_model_comparisons.pdf"))
p